	
********************************************************************************
** 	TITLE: b7_2_sag_roa_postcrime.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE: Saginaw ROA Cleaning for Post-Covid Criminal Cases
********************************************************************************
		set sortseed 13

set more off

****************************************************************Post-Covid Crime

/*
Monitoring the ROA data
*/

**Importing the Case numbers that came from Secure Server

import excel "$output_data/saginaw_case_postcrim_22dec2023.xls", sheet("Sheet1") clear

rename A case_id

tempfile server

duplicates drop case_id, force
//none are dropped.

save `server', replace

**Importing the intermediate case number information that came out of ROA webscraping

import delimited "$input_data/sag_roas/cases.csv", clear 

keep if _n < 0

tempfile scrape

//Creating an empty dataset to append to
saveold `scrape', replace 

forval t = 1(1)6{
import delimited "$input_data/sag_roas/cases0`t'.csv", clear 

append using `scrape'

saveold `scrape', replace
}

drop if case == "v1"

gen no_roa = (roa == "empty")

rename case case_id

drop left order

summ no_roa

saveold `scrape', replace

**Importing the case number information that came out of ROA parsing

import excel "$input_data/sag_roas/roasparsed_postcrim.xlsx", sheet("Sheet1") firstrow clear 

duplicates drop case_id, force
//none are dropped.

merge 1:1 case_id using `scrape'

**Dropping the non-criminal cases that were scraped in my 0.1 percent testing sample
drop if _merge == 1 

codebook _merge no_roa

merge 1:1 case_id using `server',gen(server)


gen roa_present = (roa_case_id != "")
list case_id roa_present no_roa if roa_present == 1 & no_roa == 1

drop no_roa _merge server

*Creating diff_roa variable
replace roa_case_id = subinstr(roa_case_id, "-", "",.)
gen diff_roa = .
replace diff_roa = 1 if roa_case_id != case_id & roa_case_id != "" 
replace diff_roa = 0 if  roa_case_id == case_id & roa_case_id != "" 

 
gen c_court =  (strpos(court, "Circuit") > 0)
replace c_court = . if court == ""

gen d_court =  (strpos(court, "District") > 0)
replace d_court = . if court == ""

gen criminal_court =  (strpos(court, "Criminal") > 0)
replace criminal_court = . if court == ""


//there are also Open, INACTIVE, Bench Warrant Issued, and Suspended
gen case_closed =  (strpos(case_status, "CLOSED") > 0)
replace case_closed = . if case_status == ""


/*
cases that messed up in parsing:
*/

gen mess =  (strpos(date_filed, "Ordinance Misdemeanor Criminal") > 0 | strpos(date_filed, "Statute Misdemeanor Criminal") > 0 )
replace mess = . if date_filed == "" & date_filed == "" 
/*
Public Defender Definition source: 
https://www.notafraidtowin.com/court-appointed-attorney-vs-retained-attorney/
Retained lawyer is private while court-appointed is a public defender
*/

gen public_defense = (strpos(attorney_type, "Court Appointed") > 0)
replace public_defense = . if attorney_type == ""



/*
Disposition, cases ending in plea deals
*/

gen plea = 0
gen sentenced = 0
forval t = 1(1)4{
	replace plea = 1 if strpos(d_disp_time_event`t', "Plea") > 0
	replace sentenced = 1 if strpos(d_disp_time_event`t', "Sentence") > 0
}

replace plea = . if roa_case_id == ""

replace sentenced = . if roa_case_id == ""

//Pre-release e-monitoring
gen pre_emonitor = 0
forval t = 1(1)533{
	replace pre_emonitor = 1 if strpos(e_event_date`t', "MONITOR ON RELEASE") > 0
}

summ diff_roa case_closed c_court d_court criminal_court mess public_defense plea sentenced pre_emonitor roa_present
summ diff_roa if c_court == 1

//Cleaning event-dates and event-comments
forval t = 1(1)533{
replace e_event_date`t' = subinstr(e_event_date`t',char(34), "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"[['\\n        ", "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"\\n        \\n        ', ' ', '\\n      ']]", "",.)
gen e_date`t' = substr(e_event_date`t',1,10)
gen e_dated`t' = date(e_date`t', "MDY")
gen e_month`t' = mofd(e_dated`t')
gen e_event`t' = substr(e_event_date`t',11,.)
drop e_event_date`t'

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)
}

forval t = 1(1)533{

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', '", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)

}



forval t = 1(1)28{
*d_disp_time_event1 
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',char(34), "",.)
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"', ' ']]", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"[['", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t'," ', '", "",.)

gen d_disp_date`t' = substr(d_disp_time_event`t',1,10)
gen d_disp_dated`t' = date(d_disp_date`t', "MDY")
gen d_disp_month`t' = mofd(d_disp_dated`t')
gen d_disp_event`t' = substr(d_disp_time_event`t',11,.)
drop d_disp_time_event`t'

*d_disp_charge_action1
replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',char(34), "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"['[\'\\n\\n      \', \'\\n        \', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

split d_disp_charge_action`t', parse(`"Query(function()"') generate(stub) limit(2)

drop stub2

replace d_disp_charge_action`t' = stub1

drop  stub1
}

order case_id roa_case_id court judge_name date_filed case_type case_status entitlement defendent attorney_name attorney_type c_* bs_* d_* e_*, first


saveold "$output_data/saginaw_roa_postcrim_parsed.dta", replace



use "$output_data/saginaw_roa_postcrim_parsed.dta", clear
preserve
keep if roa_present == 0 
keep case_id
export delimited using "$output_data/saginaw_case_postcrim_missing_01032024.txt", replace

restore

preserve 
keep if diff_roa == 1 
keep case_id roa_case_id
export delimited using "$output_data/saginaw_case_postcrim_diffroa_01032024.txt", replace

restore 